Preparation for occupancy modelling with unmarked - Exercises

Course 3 b Exercises

Exercise 1 - Bernoulli random variables

## get working directory, load custom function
wd<-here::here() 

source(paste0(wd, '/R/Course3_b_presence_absence_occupancy/Occu_fun.R'))

##rbinom() - a function for a virtual coin flip

## returns:
## 1 = heads
## 0 = tails

## a fair coin (prob=0.5)
rbinom(n=1, size=1, prob=0.5)
## [1] 0
## for species occurrence, each site/location has some 
## probability of occurrence and whether or not a species
## actually occurs is a (unfair) coin flip 

## the function returns a hypothetical landscape, in which grid cells are
## either occupied (black dot) or not by a hypothetical species
## the color of the grid cells encodes the probability that a cell is occupied
occu_fun()

How to estimate occupancy probability from the occu_fun output

## Imagine the function returned: 
successes<-55 ## occupied sites
trials<-100   ## all available sites/cells in the landscape


##Likelihood: what is the likelihood of observing x successes
##            out of n trials for a given value of psi (occupancy probability)

## possible values of psi
## we leave out 0 and 1 as special cases
psi<-seq(0.01, 0.99, 0.01)

## dbinom: likelihood
lik<-dbinom(successes, trials, psi)
round(lik, dig=3)
##  [1] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [13] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [25] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [37] 0.000 0.000 0.000 0.001 0.002 0.003 0.004 0.007 0.011 0.016 0.022 0.030
## [49] 0.039 0.048 0.058 0.067 0.074 0.078 0.080 0.078 0.074 0.067 0.058 0.048
## [61] 0.038 0.029 0.021 0.015 0.010 0.006 0.004 0.002 0.001 0.001 0.000 0.000
## [73] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [85] 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000
## [97] 0.000 0.000 0.000
## plot the likelihood against possible values of psi
## mark the point where the likelihood is highest --> maximum likelihood
plot(psi, lik, type='l')
abline(v=psi[lik==max(lik)])

## This is the fundamental principle of ML estimation

## in this simple case: closed form estimator of psi
successes/trials
## [1] 0.55

explore how many cells are occupied for different levels of psi

occu_fun(psi=0.6)

Exercise 2: Imperfect detection

## circles mark occupied sites
## filled circles are sites at which species is detected
## open circles are sites at which species is present but 
## not detected
set.seed(123) #ensures we all generate the same numbers
detec_fun()

## Q1: What would be your best estimate of occupancy
##     probability if you only considered cells with detections
##     as occupied?
## Q2: What is the best estimate of detection probability?


## play with different levels of p and psi
## When is the proportion of occupied sites without detections high, when low?

## Remember: p = detection probability, psi = occupancy probability
detec_fun(p=0.8, psi=0.1)

Exercise 3: Sampling effort and probability of non-detection

## change K (number of visits to each site); default = 1

detec_fun(K=2)

## What happens as K increases?


## calculate probability of not detecting the species in K 
## surveys even though it is present for p=0.5 and a range of K
## from 1 to 10

K<-1:10
p<-0.5
Pnon<-(1-p)^K
plot(K, Pnon)

## same with p=0.2
p<-0.2
Pnon<-(1-p)^K
plot(K, Pnon)

## visualize the relationship between p(non-detection), p and K
ps<-seq(0.1, 0.9, 0.1)

plot(K, seq(0, 1, length.out=length(K)), col='white',
     ylab='P(nondet)')
for(i in 1:length(ps)){
  points(K, (1-ps[i])^K, type='l',
         col=i)
}

## the lower p, the higher is p(non-detection)
## the higher K, the lower is p(non-detection)
## but for very low p, even high K still miss the species a lot

Exercise 4: covariates on occupancy and detection

covariate on occupancy

## fake landscape with covariate positively affecting occupancy

##lighter color = higher occu prob
occu_fun(covar=TRUE)

##look at occurrences against covariate gradient by having the 
## function return the generated data

dat<-occu_fun(covar=TRUE, return=TRUE)

##outcome (here, called 'occ') vs predictor (here, called 'xp')
plot(dat$xp, dat$occ, pch=19,
     xlab='X', ylab='Occupancy (probability)')

## occupancy probability (here, called 'z') vs predictor 
points(dat$xp, dat$z, pch=19, col='forestgreen')

covariate on detection

detec_fun(covar=TRUE)

## plot is not helpful as it displays occupancy probability

## so, have function return data
datp<-detec_fun(covar=TRUE, return=TRUE)

##look at relationship between occupied cells ('occ') and covariate
## on detection 'xp'
plot(datp$xp, datp$occ, pch=19,
     xlab='X', ylab='Occupancy (probability)')

##occupancy probability ('z') vs predictor
points(datp$xp, datp$z, pch=19, col='forestgreen')

##there is no relationship (because the covariate affects p, not psi) 

##look at relationship of detections ('y') and covariate
## only consider occupied cells
plot(datp$xp[dat$occ == 1], datp$y[dat$occ == 1], pch=19,
     xlab='X', ylab='Detection (probability)')

##detection probability vs predictor
points(datp$xp, datp$p, pch=19, col='forestgreen')

OPTIONAL: What if we ignore the covariate effect on detection?

## Let's look at cells with and cells without detections in
## relation to the covariate
## This ignores imperfect detection (ie, cells that are occupied but where we
## did not detect the species - open circles)
plot(datp$xp, datp$y, pch=19,
     xlab='X', ylab='Observed occupancy (probability)')

##logistic regression: observed occupancy (ignoring imperfect detection)
## as a function of predictor xp, which in truth affects detection probability
mm<-glm(y~xp, data=datp, family='binomial')
pp<-predict.glm(mm, type='response')
points(datp$xp, pp,  pch=19, col='forestgreen')

## We would falsely conclude that the species' occupancy probability is positively
## related to the predictor variable, when in fact, it's detection probability
## that is positively related to the predictor



Session Info
Sys.time()
## [1] "2026-02-02 13:51:39 CET"
#git2r::repository() ## uncomment if you are using GitHub
sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 10 x64 (build 19044)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=German_Germany.utf8  LC_CTYPE=German_Germany.utf8   
## [3] LC_MONETARY=German_Germany.utf8 LC_NUMERIC=C                   
## [5] LC_TIME=C                      
## 
## time zone: Europe/Berlin
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.39     R6_2.6.1          bookdown_0.46     fastmap_1.2.0    
##  [5] xfun_0.54         cachem_1.1.0      knitr_1.50        htmltools_0.5.9  
##  [9] rmarkdown_2.30    lifecycle_1.0.4   cli_3.6.5         sass_0.4.10      
## [13] textshaping_1.0.3 jquerylib_0.1.4   systemfonts_1.3.0 compiler_4.5.1   
## [17] rprojroot_2.1.1   here_1.0.2        rstudioapi_0.17.1 tools_4.5.1      
## [21] ragg_1.5.0        evaluate_1.0.5    bslib_0.9.0       rmdformats_1.0.4 
## [25] yaml_2.3.11       jsonlite_2.0.0    rlang_1.1.6